#!/usr/bin/env python
# coding=utf-8
# __author__ = 'Yunchao Ling'

import importlib, sys

default_encoding = 'utf-8'
if sys.getdefaultencoding() != default_encoding:
    importlib.reload(sys)
    sys.setdefaultencoding(default_encoding)

ht = {}

infile = open("D:/Seafile/Work/生物岛知识图谱/Schema/schema_20230607.tsv", "r", encoding='utf-8')
infile.readline()
for line in infile:
    line = line.rstrip("\n")
    splitline = line.split("\t")

    relation = splitline[0]

    head_type = splitline[2]
    tail_type = splitline[3]
    head_tail = head_type + " | " + tail_type + " | " + relation
    if not head_tail in ht:
        ht[head_tail] = []
    ht[head_tail].append(relation)

infile.close()

outfile = open("D:/Seafile/Work/生物岛知识图谱/Schema/schema_duplicate_20230607_2.tsv", "w")
for key in ht:
    if len(ht[key]) > 1:
        outfile.write("%s\t%s\n" % (key, str(ht[key])))
    outfile.flush()
outfile.close()
